The objectives are
options(warn=-1)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.1 v purrr 0.3.4
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
library(htmlwidgets)
library(ggcorrplot)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggpubr)
library(readr)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following object is masked from 'package:ggpubr':
##
## gghistogram
library(fmsb)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
require(maps)
## Loading required package: maps
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
require(viridis)
## Loading required package: viridis
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
dataset <- read.csv("players_21.csv")
copy_dataset <- dataset
head(dataset)
## sofifa_id player_url
## 1 158023 https://sofifa.com/player/158023/lionel-messi/210002
## 2 20801 https://sofifa.com/player/20801/c-ronaldo-dos-santos-aveiro/210002
## 3 200389 https://sofifa.com/player/200389/jan-oblak/210002
## 4 188545 https://sofifa.com/player/188545/robert-lewandowski/210002
## 5 190871 https://sofifa.com/player/190871/neymar-da-silva-santos-jr/210002
## 6 192985 https://sofifa.com/player/192985/kevin-de-bruyne/210002
## short_name long_name age dob
## 1 L. Messi Lionel Andrés Messi Cuccittini 33 1987-06-24
## 2 Cristiano Ronaldo Cristiano Ronaldo dos Santos Aveiro 35 1985-02-05
## 3 J. Oblak Jan Oblak 27 1993-01-07
## 4 R. Lewandowski Robert Lewandowski 31 1988-08-21
## 5 Neymar Jr Neymar da Silva Santos Júnior 28 1992-02-05
## 6 K. De Bruyne Kevin De Bruyne 29 1991-06-28
## height_cm weight_kg nationality club_name league_name
## 1 170 72 Argentina FC Barcelona Spain Primera Division
## 2 187 83 Portugal Juventus Italian Serie A
## 3 188 87 Slovenia Atlético Madrid Spain Primera Division
## 4 184 80 Poland FC Bayern München German 1. Bundesliga
## 5 175 68 Brazil Paris Saint-Germain French Ligue 1
## 6 181 70 Belgium Manchester City English Premier League
## league_rank overall potential value_eur wage_eur player_positions
## 1 1 93 93 67500000 560000 RW, ST, CF
## 2 1 92 92 46000000 220000 ST, LW
## 3 1 91 93 75000000 125000 GK
## 4 1 91 91 80000000 240000 ST
## 5 1 91 91 90000000 270000 LW, CAM
## 6 1 91 91 87000000 370000 CAM, CM
## preferred_foot international_reputation weak_foot skill_moves work_rate
## 1 Left 5 4 4 Medium/Low
## 2 Right 5 4 5 High/Low
## 3 Right 3 3 1 Medium/Medium
## 4 Right 4 4 4 High/Medium
## 5 Right 5 5 5 High/Medium
## 6 Right 4 5 4 High/High
## body_type real_face release_clause_eur
## 1 Messi Yes 138400000
## 2 C. Ronaldo Yes 75900000
## 3 PLAYER_BODY_TYPE_259 Yes 159400000
## 4 PLAYER_BODY_TYPE_276 Yes 132000000
## 5 Neymar Yes 166500000
## 6 PLAYER_BODY_TYPE_321 Yes 161000000
## player_tags
## 1 #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward
## 2 #Aerial Threat, #Dribbler, #Distance Shooter, #Acrobat, #Clinical Finisher, #Complete Forward
## 3
## 4 #Distance Shooter, #Clinical Finisher
## 5 #Speedster, #Dribbler, #Playmaker, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Midfielder, #Complete Forward
## 6 #Dribbler, #Playmaker, #Engine, #Distance Shooter, #Crosser, #Complete Midfielder
## team_position team_jersey_number loaned_from joined contract_valid_until
## 1 CAM 10 2004-07-01 2021
## 2 LS 7 2018-07-10 2022
## 3 GK 13 2014-07-16 2023
## 4 ST 9 2014-07-01 2023
## 5 LW 10 2017-08-03 2022
## 6 RCM 17 2015-08-30 2023
## nation_position nation_jersey_number pace shooting passing dribbling
## 1 RW 10 85 92 91 95
## 2 LS 7 89 93 81 89
## 3 GK 1 NA NA NA NA
## 4 NA 78 91 78 85
## 5 NA 91 85 86 94
## 6 RCM 7 76 86 93 88
## defending physic gk_diving gk_handling gk_kicking gk_reflexes gk_speed
## 1 38 65 NA NA NA NA NA
## 2 35 77 NA NA NA NA NA
## 3 NA NA 87 92 78 90 52
## 4 43 82 NA NA NA NA NA
## 5 36 59 NA NA NA NA NA
## 6 64 78 NA NA NA NA NA
## gk_positioning
## 1 NA
## 2 NA
## 3 90
## 4 NA
## 5 NA
## 6 NA
## player_traits
## 1 Finesse Shot, Long Shot Taker (AI), Speed Dribbler (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Team Player, Chip Shot (AI)
## 2 Power Free-Kick, Flair, Long Shot Taker (AI), Speed Dribbler (AI), Outside Foot Shot
## 3 GK Long Throw, Comes For Crosses
## 4 Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI)
## 5 Injury Prone, Flair, Speed Dribbler (AI), Outside Foot Shot, Technical Dribbler (AI)
## 6 Injury Prone, Leadership, Early Crosser, Long Passer (AI), Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot
## attacking_crossing attacking_finishing attacking_heading_accuracy
## 1 85 95 70
## 2 84 95 90
## 3 13 11 15
## 4 71 94 85
## 5 85 87 62
## 6 94 82 55
## attacking_short_passing attacking_volleys skill_dribbling skill_curve
## 1 91 88 96 93
## 2 82 86 88 81
## 3 43 13 12 13
## 4 84 89 85 79
## 5 87 87 95 88
## 6 94 82 88 85
## skill_fk_accuracy skill_long_passing skill_ball_control movement_acceleration
## 1 94 91 96 91
## 2 76 77 92 87
## 3 14 40 30 43
## 4 85 70 88 77
## 5 89 81 95 94
## 6 83 93 92 77
## movement_sprint_speed movement_agility movement_reactions movement_balance
## 1 80 91 94 95
## 2 91 87 95 71
## 3 60 67 88 49
## 4 78 77 93 82
## 5 89 96 91 83
## 6 76 78 91 76
## power_shot_power power_jumping power_stamina power_strength power_long_shots
## 1 86 68 72 69 94
## 2 94 95 84 78 93
## 3 59 78 41 78 12
## 4 89 84 76 86 85
## 5 80 62 81 50 84
## 6 91 63 89 74 91
## mentality_aggression mentality_interceptions mentality_positioning
## 1 44 40 93
## 2 63 29 95
## 3 34 19 11
## 4 81 49 94
## 5 51 36 87
## 6 76 66 88
## mentality_vision mentality_penalties mentality_composure defending_marking
## 1 95 75 96 NA
## 2 82 84 95 NA
## 3 65 11 68 NA
## 4 79 88 88 NA
## 5 90 92 93 NA
## 6 94 84 91 NA
## defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## 1 35 24 6
## 2 32 24 7
## 3 12 18 87
## 4 42 19 15
## 5 30 29 9
## 6 65 53 15
## goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## 1 11 15 14
## 2 11 15 14
## 3 92 78 90
## 4 6 12 8
## 5 9 15 15
## 6 13 5 10
## goalkeeping_reflexes ls st rs lw lf cf rf rw lam cam ram
## 1 8 89+3 89+3 89+3 92+0 93+0 93+0 93+0 92+0 93+0 93+0 93+0
## 2 11 91+1 91+1 91+1 89+0 91+0 91+0 91+0 89+0 88+3 88+3 88+3
## 3 90 33+3 33+3 33+3 32+0 35+0 35+0 35+0 32+0 38+3 38+3 38+3
## 4 10 89+2 89+2 89+2 85+0 87+0 87+0 87+0 85+0 85+3 85+3 85+3
## 5 11 84+3 84+3 84+3 90+0 89+0 89+0 89+0 90+0 90+1 90+1 90+1
## 6 13 83+3 83+3 83+3 88+0 88+0 88+0 88+0 88+0 89+2 89+2 89+2
## lm lcm cm rcm rm lwb ldm cdm rdm rwb lb lcb cb rcb rb
## 1 91+2 87+3 87+3 87+3 91+2 66+3 65+3 65+3 65+3 66+3 62+3 52+3 52+3 52+3 62+3
## 2 88+3 81+3 81+3 81+3 88+3 65+3 61+3 61+3 61+3 65+3 61+3 54+3 54+3 54+3 61+3
## 3 35+3 38+3 38+3 38+3 35+3 32+3 36+3 36+3 36+3 32+3 32+3 33+3 33+3 33+3 32+3
## 4 83+3 79+3 79+3 79+3 83+3 64+3 65+3 65+3 65+3 64+3 61+3 60+3 60+3 60+3 61+3
## 5 90+1 83+3 83+3 83+3 90+1 67+3 62+3 62+3 62+3 67+3 62+3 49+3 49+3 49+3 62+3
## 6 89+2 89+2 89+2 89+2 89+2 79+3 80+3 80+3 80+3 79+3 75+3 69+3 69+3 69+3 75+3
str(dataset)
## 'data.frame': 18944 obs. of 106 variables:
## $ sofifa_id : int 158023 20801 200389 188545 190871 192985 231747 192448 203376 212831 ...
## $ player_url : chr "https://sofifa.com/player/158023/lionel-messi/210002" "https://sofifa.com/player/20801/c-ronaldo-dos-santos-aveiro/210002" "https://sofifa.com/player/200389/jan-oblak/210002" "https://sofifa.com/player/188545/robert-lewandowski/210002" ...
## $ short_name : chr "L. Messi" "Cristiano Ronaldo" "J. Oblak" "R. Lewandowski" ...
## $ long_name : chr "Lionel Andrés Messi Cuccittini" "Cristiano Ronaldo dos Santos Aveiro" "Jan Oblak" "Robert Lewandowski" ...
## $ age : int 33 35 27 31 28 29 21 28 28 27 ...
## $ dob : chr "1987-06-24" "1985-02-05" "1993-01-07" "1988-08-21" ...
## $ height_cm : int 170 187 188 184 175 181 178 187 193 191 ...
## $ weight_kg : int 72 83 87 80 68 70 73 85 92 91 ...
## $ nationality : chr "Argentina" "Portugal" "Slovenia" "Poland" ...
## $ club_name : chr "FC Barcelona" "Juventus" "Atlético Madrid" "FC Bayern München" ...
## $ league_name : chr "Spain Primera Division" "Italian Serie A" "Spain Primera Division" "German 1. Bundesliga" ...
## $ league_rank : int 1 1 1 1 1 1 1 1 1 1 ...
## $ overall : int 93 92 91 91 91 91 90 90 90 90 ...
## $ potential : int 93 92 93 91 91 91 95 93 91 91 ...
## $ value_eur : int 67500000 46000000 75000000 80000000 90000000 87000000 105500000 69500000 75500000 62500000 ...
## $ wage_eur : int 560000 220000 125000 240000 270000 370000 160000 260000 210000 160000 ...
## $ player_positions : chr "RW, ST, CF" "ST, LW" "GK" "ST" ...
## $ preferred_foot : chr "Left" "Right" "Right" "Right" ...
## $ international_reputation : int 5 5 3 4 5 4 3 3 3 3 ...
## $ weak_foot : int 4 4 3 4 5 5 4 4 3 3 ...
## $ skill_moves : int 4 5 1 4 5 4 5 1 2 1 ...
## $ work_rate : chr "Medium/Low" "High/Low" "Medium/Medium" "High/Medium" ...
## $ body_type : chr "Messi" "C. Ronaldo" "PLAYER_BODY_TYPE_259" "PLAYER_BODY_TYPE_276" ...
## $ real_face : chr "Yes" "Yes" "Yes" "Yes" ...
## $ release_clause_eur : int 138400000 75900000 159400000 132000000 166500000 161000000 203100000 147700000 145300000 120300000 ...
## $ player_tags : chr "#Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward" "#Aerial Threat, #Dribbler, #Distance Shooter, #Acrobat, #Clinical Finisher, #Complete Forward" "" "#Distance Shooter, #Clinical Finisher" ...
## $ team_position : chr "CAM" "LS" "GK" "ST" ...
## $ team_jersey_number : int 10 7 13 9 10 17 7 1 4 1 ...
## $ loaned_from : chr "" "" "" "" ...
## $ joined : chr "2004-07-01" "2018-07-10" "2014-07-16" "2014-07-01" ...
## $ contract_valid_until : int 2021 2022 2023 2023 2022 2023 2022 2022 2023 2024 ...
## $ nation_position : chr "RW" "LS" "GK" "" ...
## $ nation_jersey_number : int 10 7 1 NA NA 7 10 22 4 NA ...
## $ pace : int 85 89 NA 78 91 76 96 NA 76 NA ...
## $ shooting : int 92 93 NA 91 85 86 86 NA 60 NA ...
## $ passing : int 91 81 NA 78 86 93 78 NA 71 NA ...
## $ dribbling : int 95 89 NA 85 94 88 91 NA 71 NA ...
## $ defending : int 38 35 NA 43 36 64 39 NA 91 NA ...
## $ physic : int 65 77 NA 82 59 78 76 NA 86 NA ...
## $ gk_diving : int NA NA 87 NA NA NA NA 88 NA 86 ...
## $ gk_handling : int NA NA 92 NA NA NA NA 85 NA 88 ...
## $ gk_kicking : int NA NA 78 NA NA NA NA 88 NA 85 ...
## $ gk_reflexes : int NA NA 90 NA NA NA NA 90 NA 89 ...
## $ gk_speed : int NA NA 52 NA NA NA NA 45 NA 51 ...
## $ gk_positioning : int NA NA 90 NA NA NA NA 88 NA 91 ...
## $ player_traits : chr "Finesse Shot, Long Shot Taker (AI), Speed Dribbler (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Te"| __truncated__ "Power Free-Kick, Flair, Long Shot Taker (AI), Speed Dribbler (AI), Outside Foot Shot" "GK Long Throw, Comes For Crosses" "Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI)" ...
## $ attacking_crossing : int 85 84 13 71 85 94 78 18 53 17 ...
## $ attacking_finishing : int 95 95 11 94 87 82 91 14 52 13 ...
## $ attacking_heading_accuracy: int 70 90 15 85 62 55 73 11 87 19 ...
## $ attacking_short_passing : int 91 82 43 84 87 94 83 61 79 45 ...
## $ attacking_volleys : int 88 86 13 89 87 82 83 14 45 20 ...
## $ skill_dribbling : int 96 88 12 85 95 88 92 21 70 27 ...
## $ skill_curve : int 93 81 13 79 88 85 79 18 60 19 ...
## $ skill_fk_accuracy : int 94 76 14 85 89 83 63 12 70 18 ...
## $ skill_long_passing : int 91 77 40 70 81 93 70 63 86 44 ...
## $ skill_ball_control : int 96 92 30 88 95 92 90 30 77 30 ...
## $ movement_acceleration : int 91 87 43 77 94 77 96 38 72 56 ...
## $ movement_sprint_speed : int 80 91 60 78 89 76 96 50 79 47 ...
## $ movement_agility : int 91 87 67 77 96 78 92 37 61 40 ...
## $ movement_reactions : int 94 95 88 93 91 91 92 86 89 88 ...
## $ movement_balance : int 95 71 49 82 83 76 82 43 53 37 ...
## $ power_shot_power : int 86 94 59 89 80 91 86 66 81 64 ...
## $ power_jumping : int 68 95 78 84 62 63 77 79 90 52 ...
## $ power_stamina : int 72 84 41 76 81 89 86 35 75 32 ...
## $ power_strength : int 69 78 78 86 50 74 76 78 92 78 ...
## $ power_long_shots : int 94 93 12 85 84 91 79 10 64 14 ...
## $ mentality_aggression : int 44 63 34 81 51 76 62 43 83 27 ...
## $ mentality_interceptions : int 40 29 19 49 36 66 38 22 90 11 ...
## $ mentality_positioning : int 93 95 11 94 87 88 91 11 47 13 ...
## $ mentality_vision : int 95 82 65 79 90 94 80 70 65 66 ...
## $ mentality_penalties : int 75 84 11 88 92 84 70 25 62 23 ...
## $ mentality_composure : int 96 95 68 88 93 91 84 70 90 65 ...
## $ defending_marking : logi NA NA NA NA NA NA ...
## $ defending_standing_tackle : int 35 32 12 42 30 65 34 13 93 19 ...
## $ defending_sliding_tackle : int 24 24 18 19 29 53 32 10 86 16 ...
## $ goalkeeping_diving : int 6 7 87 15 9 15 13 88 13 86 ...
## $ goalkeeping_handling : int 11 11 92 6 9 13 5 85 10 88 ...
## $ goalkeeping_kicking : int 15 15 78 12 15 5 7 88 13 85 ...
## $ goalkeeping_positioning : int 14 14 90 8 15 10 11 88 11 91 ...
## $ goalkeeping_reflexes : int 8 11 90 10 11 13 6 90 11 89 ...
## $ ls : chr "89+3" "91+1" "33+3" "89+2" ...
## $ st : chr "89+3" "91+1" "33+3" "89+2" ...
## $ rs : chr "89+3" "91+1" "33+3" "89+2" ...
## $ lw : chr "92+0" "89+0" "32+0" "85+0" ...
## $ lf : chr "93+0" "91+0" "35+0" "87+0" ...
## $ cf : chr "93+0" "91+0" "35+0" "87+0" ...
## $ rf : chr "93+0" "91+0" "35+0" "87+0" ...
## $ rw : chr "92+0" "89+0" "32+0" "85+0" ...
## $ lam : chr "93+0" "88+3" "38+3" "85+3" ...
## $ cam : chr "93+0" "88+3" "38+3" "85+3" ...
## $ ram : chr "93+0" "88+3" "38+3" "85+3" ...
## $ lm : chr "91+2" "88+3" "35+3" "83+3" ...
## $ lcm : chr "87+3" "81+3" "38+3" "79+3" ...
## $ cm : chr "87+3" "81+3" "38+3" "79+3" ...
## $ rcm : chr "87+3" "81+3" "38+3" "79+3" ...
## $ rm : chr "91+2" "88+3" "35+3" "83+3" ...
## $ lwb : chr "66+3" "65+3" "32+3" "64+3" ...
## $ ldm : chr "65+3" "61+3" "36+3" "65+3" ...
## $ cdm : chr "65+3" "61+3" "36+3" "65+3" ...
## [list output truncated]
dimensions <- dim(dataset)
paste("Rows :", dimensions[1], " Columns :", dimensions[2], sep = " ")
## [1] "Rows : 18944 Columns : 106"
summary(dataset)
## sofifa_id player_url short_name long_name
## Min. : 41 Length:18944 Length:18944 Length:18944
## 1st Qu.:210031 Class :character Class :character Class :character
## Median :232315 Mode :character Mode :character Mode :character
## Mean :226242
## 3rd Qu.:246760
## Max. :258970
##
## age dob height_cm weight_kg
## Min. :16.00 Length:18944 Min. :155.0 Min. : 50.00
## 1st Qu.:21.00 Class :character 1st Qu.:176.0 1st Qu.: 70.00
## Median :25.00 Mode :character Median :181.0 Median : 75.00
## Mean :25.23 Mean :181.2 Mean : 75.02
## 3rd Qu.:29.00 3rd Qu.:186.0 3rd Qu.: 80.00
## Max. :53.00 Max. :206.0 Max. :110.00
##
## nationality club_name league_name league_rank
## Length:18944 Length:18944 Length:18944 Min. :1.000
## Class :character Class :character Class :character 1st Qu.:1.000
## Mode :character Mode :character Mode :character Median :1.000
## Mean :1.357
## 3rd Qu.:1.000
## Max. :4.000
## NA's :225
## overall potential value_eur wage_eur
## Min. :47.00 Min. :47.00 Min. : 0 Min. : 0
## 1st Qu.:61.00 1st Qu.:67.00 1st Qu.: 300000 1st Qu.: 1000
## Median :66.00 Median :71.00 Median : 650000 Median : 3000
## Mean :65.68 Mean :71.09 Mean : 2224813 Mean : 8676
## 3rd Qu.:70.00 3rd Qu.:75.00 3rd Qu.: 1800000 3rd Qu.: 7000
## Max. :93.00 Max. :95.00 Max. :105500000 Max. :560000
##
## player_positions preferred_foot international_reputation weak_foot
## Length:18944 Length:18944 Min. :1.000 Min. :1.000
## Class :character Class :character 1st Qu.:1.000 1st Qu.:3.000
## Mode :character Mode :character Median :1.000 Median :3.000
## Mean :1.092 Mean :2.937
## 3rd Qu.:1.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000
##
## skill_moves work_rate body_type real_face
## Min. :1.000 Length:18944 Length:18944 Length:18944
## 1st Qu.:2.000 Class :character Class :character Class :character
## Median :2.000 Mode :character Mode :character Mode :character
## Mean :2.363
## 3rd Qu.:3.000
## Max. :5.000
##
## release_clause_eur player_tags team_position team_jersey_number
## Min. : 9000 Length:18944 Length:18944 Min. : 1.00
## 1st Qu.: 525000 Class :character Class :character 1st Qu.: 9.00
## Median : 1100000 Mode :character Mode :character Median :18.00
## Mean : 4296353 Mean :20.59
## 3rd Qu.: 3200000 3rd Qu.:27.00
## Max. :203100000 Max. :99.00
## NA's :995 NA's :225
## loaned_from joined contract_valid_until nation_position
## Length:18944 Length:18944 Min. :2020 Length:18944
## Class :character Class :character 1st Qu.:2021 Class :character
## Mode :character Mode :character Median :2022 Mode :character
## Mean :2022
## 3rd Qu.:2023
## Max. :2028
## NA's :225
## nation_jersey_number pace shooting passing
## Min. : 1.00 Min. :25.00 Min. :16.00 Min. :25.00
## 1st Qu.: 6.00 1st Qu.:62.00 1st Qu.:42.00 1st Qu.:50.00
## Median :12.00 Median :68.00 Median :54.00 Median :58.00
## Mean :12.03 Mean :67.67 Mean :52.27 Mean :57.14
## 3rd Qu.:18.00 3rd Qu.:75.00 3rd Qu.:63.00 3rd Qu.:64.00
## Max. :27.00 Max. :96.00 Max. :93.00 Max. :93.00
## NA's :17817 NA's :2083 NA's :2083 NA's :2083
## dribbling defending physic gk_diving
## Min. :25.00 Min. :15.00 Min. :28.00 Min. :45.00
## 1st Qu.:57.00 1st Qu.:36.00 1st Qu.:58.00 1st Qu.:60.00
## Median :64.00 Median :56.00 Median :66.00 Median :65.00
## Mean :62.46 Mean :51.32 Mean :64.46 Mean :65.16
## 3rd Qu.:69.00 3rd Qu.:64.00 3rd Qu.:72.00 3rd Qu.:70.00
## Max. :95.00 Max. :91.00 Max. :91.00 Max. :90.00
## NA's :2083 NA's :2083 NA's :2083 NA's :16861
## gk_handling gk_kicking gk_reflexes gk_speed
## Min. :43.00 Min. :35.00 Min. :44.00 Min. :12.0
## 1st Qu.:58.00 1st Qu.:57.00 1st Qu.:60.00 1st Qu.:28.0
## Median :63.00 Median :61.00 Median :66.00 Median :37.0
## Mean :62.89 Mean :61.72 Mean :66.11 Mean :37.2
## 3rd Qu.:68.00 3rd Qu.:66.00 3rd Qu.:72.00 3rd Qu.:45.0
## Max. :92.00 Max. :93.00 Max. :90.00 Max. :65.0
## NA's :16861 NA's :16861 NA's :16861 NA's :16861
## gk_positioning player_traits attacking_crossing attacking_finishing
## Min. :38.00 Length:18944 Min. : 6.00 Min. : 3.0
## 1st Qu.:57.00 Class :character 1st Qu.:38.00 1st Qu.:30.0
## Median :63.00 Mode :character Median :54.00 Median :49.0
## Mean :63.17 Mean :49.61 Mean :45.8
## 3rd Qu.:69.00 3rd Qu.:63.00 3rd Qu.:62.0
## Max. :91.00 Max. :94.00 Max. :95.0
## NA's :16861
## attacking_heading_accuracy attacking_short_passing attacking_volleys
## Min. : 5.00 Min. : 7.00 Min. : 3.00
## 1st Qu.:44.00 1st Qu.:54.00 1st Qu.:30.00
## Median :55.00 Median :62.00 Median :44.00
## Mean :51.87 Mean :58.71 Mean :42.67
## 3rd Qu.:64.00 3rd Qu.:68.00 3rd Qu.:56.00
## Max. :93.00 Max. :94.00 Max. :90.00
##
## skill_dribbling skill_curve skill_fk_accuracy skill_long_passing
## Min. : 5.00 Min. : 4.00 Min. : 5.00 Min. : 5.00
## 1st Qu.:49.00 1st Qu.:35.00 1st Qu.:31.00 1st Qu.:43.00
## Median :61.00 Median :48.00 Median :41.00 Median :56.00
## Mean :55.55 Mean :47.19 Mean :42.36 Mean :52.65
## 3rd Qu.:68.00 3rd Qu.:61.00 3rd Qu.:55.00 3rd Qu.:64.00
## Max. :96.00 Max. :94.00 Max. :94.00 Max. :93.00
##
## skill_ball_control movement_acceleration movement_sprint_speed
## Min. : 5.00 Min. :13.00 Min. :12.00
## 1st Qu.:54.00 1st Qu.:57.00 1st Qu.:57.00
## Median :63.00 Median :67.00 Median :67.00
## Mean :58.48 Mean :64.29 Mean :64.33
## 3rd Qu.:69.00 3rd Qu.:74.00 3rd Qu.:74.00
## Max. :96.00 Max. :97.00 Max. :96.00
##
## movement_agility movement_reactions movement_balance power_shot_power
## Min. :14.00 Min. :24.00 Min. :12.00 Min. :18.00
## 1st Qu.:55.00 1st Qu.:56.00 1st Qu.:56.00 1st Qu.:48.00
## Median :66.00 Median :62.00 Median :66.00 Median :59.00
## Mean :63.33 Mean :61.61 Mean :63.92 Mean :57.75
## 3rd Qu.:74.00 3rd Qu.:68.00 3rd Qu.:74.00 3rd Qu.:68.00
## Max. :96.00 Max. :95.00 Max. :97.00 Max. :95.00
##
## power_jumping power_stamina power_strength power_long_shots
## Min. :15.00 Min. :12.0 Min. :16.00 Min. : 4.00
## 1st Qu.:58.00 1st Qu.:55.0 1st Qu.:57.00 1st Qu.:32.00
## Median :65.00 Median :66.0 Median :66.00 Median :51.00
## Mean :64.59 Mean :62.6 Mean :64.74 Mean :46.76
## 3rd Qu.:73.00 3rd Qu.:73.0 3rd Qu.:74.00 3rd Qu.:62.00
## Max. :95.00 Max. :97.0 Max. :97.00 Max. :94.00
##
## mentality_aggression mentality_interceptions mentality_positioning
## Min. : 9.00 Min. : 3.00 Min. : 2.00
## 1st Qu.:44.00 1st Qu.:25.00 1st Qu.:40.00
## Median :58.00 Median :52.00 Median :55.00
## Mean :55.49 Mean :46.25 Mean :50.26
## 3rd Qu.:68.00 3rd Qu.:64.00 3rd Qu.:64.00
## Max. :96.00 Max. :91.00 Max. :95.00
##
## mentality_vision mentality_penalties mentality_composure defending_marking
## Min. : 9.00 Min. : 6.00 Min. :12.00 Mode:logical
## 1st Qu.:45.00 1st Qu.:38.75 1st Qu.:50.00 NA's:18944
## Median :55.00 Median :49.00 Median :59.00
## Mean :53.83 Mean :48.05 Mean :57.98
## 3rd Qu.:64.00 3rd Qu.:60.00 3rd Qu.:66.00
## Max. :95.00 Max. :92.00 Max. :96.00
##
## defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## Min. : 5.00 Min. : 4.00 Min. : 1.00
## 1st Qu.:27.00 1st Qu.:24.00 1st Qu.: 8.00
## Median :55.00 Median :52.00 Median :11.00
## Mean :47.58 Mean :45.55 Mean :16.45
## 3rd Qu.:65.00 3rd Qu.:63.00 3rd Qu.:14.00
## Max. :93.00 Max. :90.00 Max. :90.00
##
## goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## Min. : 1.00 Min. : 1.0 Min. : 1.00
## 1st Qu.: 8.00 1st Qu.: 8.0 1st Qu.: 8.00
## Median :11.00 Median :11.0 Median :11.00
## Mean :16.24 Mean :16.1 Mean :16.23
## 3rd Qu.:14.00 3rd Qu.:14.0 3rd Qu.:14.00
## Max. :92.00 Max. :93.0 Max. :91.00
##
## goalkeeping_reflexes ls st rs
## Min. : 1.00 Length:18944 Length:18944 Length:18944
## 1st Qu.: 8.00 Class :character Class :character Class :character
## Median :11.00 Mode :character Mode :character Mode :character
## Mean :16.55
## 3rd Qu.:14.00
## Max. :90.00
##
## lw lf cf rf
## Length:18944 Length:18944 Length:18944 Length:18944
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## rw lam cam ram
## Length:18944 Length:18944 Length:18944 Length:18944
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## lm lcm cm rcm
## Length:18944 Length:18944 Length:18944 Length:18944
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## rm lwb ldm cdm
## Length:18944 Length:18944 Length:18944 Length:18944
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## rdm rwb lb lcb
## Length:18944 Length:18944 Length:18944 Length:18944
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## cb rcb rb
## Length:18944 Length:18944 Length:18944
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
dataset <- subset(dataset, select = -c(player_url, dob, body_type, real_face, team_jersey_number, nation_jersey_number))
copy_dataset <- dataset
ggplot(dataset, aes(x = age)) + geom_histogram(binwidth = 1, stat = "bin", color = "black", fill = "#0066CC") + geom_vline(aes(xintercept = mean(age)), color = "#CC0000", linetype = "dashed" , size = 1) + labs(title = "Age Distribution", x = "Age Groups", y = "Frequnecy")
ggplot(dataset, aes(x = age)) + geom_boxplot()
From the above histogram graph it is clearly seen that the distribution of the age groups follows a normal distribution, explaining the even distribution of age groups, with few very young and few very old players. From the boxplot we can see that the older age groups have many outliers compared to the youngsters. Also majority of the footballers fall under the age groups 25 - 30.
ggplot(dataset, aes(x = overall)) + geom_histogram(binwidth = 1, stat = "bin", color = "black", fill = "#D7BDE2") + geom_vline(aes(xintercept = mean(overall)), color = "#CC0000", linetype = "dashed" , size = 1) + labs(title = "Overall Rating Distribution", x = "Rating", y = "Frequnecy")
ggplot(dataset, aes(x = overall)) + geom_boxplot()
From the above histogram, we can visualize the distribution of the overall rating and it follows a normal distribution, but the boxplot shows equal number of outliers at both the tails, indicating the presence of few extremely good players and low overall players. The average overall ranges from 65 - 75.
top_10_players <- dataset[c(1,2,4:7,9,11:12,14),c(2,28:33)]
rownames(top_10_players) <- c(top_10_players$short_name)
top_10_players <- subset(top_10_players, select = -c(short_name))
colors <- c("#00AFBB", "#E7B800", "#FC4E07","#6C3483","#A93226","#EAF2F8","#0B5345","#2471A3","#2ECC71","#C0392B")
titles <- c(rownames(top_10_players))
source("fun1.R")
max_min <- data.frame(
pace = c(100, 30), shooting = c(100, 30), passing = c(100, 30),
dribbling = c(100, 30), defending = c(100, 30), physic = c(100, 30)
)
rownames(max_min) <- c("Max", "Min")
colnames(max_min) <- colnames(top_10_players)
df <- rbind(max_min, top_10_players)
op <- par(mar = c(1, 1, 1,1))
par(mfrow = c(2,3))
for(i in 1:10){
create_beautiful_radarchart(
data = df[c(1, 2, i + 2), ], caxislabels = c(30, 47, 65, 82, 100),
color = colors[i], title = titles[i]
)
}
par(op)
The above graphs shows the distribution of individual attributes for the top 10 players excluding the goalkeepers. The graph shows how players strengths varies depending on their positions.
This question is addressed to find out which clubs are more likely to spend on high quality players and be busy during a transfer market.
club_dataset <- dataset[dataset$club_name != "", ]
no_Players <- table(club_dataset$club_name)
no_Players <- data.frame(no_Players)
head(no_Players)
## Var1 Freq
## 1 1. FC Heidenheim 1846 30
## 2 1. FC Köln 30
## 3 1. FC Kaiserslautern 28
## 4 1. FC Magdeburg 27
## 5 1. FC Nürnberg 30
## 6 1. FC Saarbrücken 26
no_Players_summary <- summary(no_Players$Freq)
no_Players_summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 26.00 28.00 27.49 30.00 33.00
paste("Each Team has an average of", no_Players_summary[4], sep = " ")
## [1] "Each Team has an average of 27.4875183553598"
ggplot(no_Players, aes(x = Freq)) + geom_boxplot()
Here we calculate the economic ratio for each player by finding the ratio of wage/ potential indicating, how much wage a player gets per potential.
economical_ratio <- club_dataset$wage_eur / club_dataset$potential
club_dataset$economic_ratio <- round(economical_ratio,2)
head(club_dataset[,c("short_name","wage_eur", "potential", "economic_ratio")])
## short_name wage_eur potential economic_ratio
## 1 L. Messi 560000 93 6021.51
## 2 Cristiano Ronaldo 220000 92 2391.30
## 3 J. Oblak 125000 93 1344.09
## 4 R. Lewandowski 240000 91 2637.36
## 5 Neymar Jr 270000 91 2967.03
## 6 K. De Bruyne 370000 91 4065.93
club_economical <- club_dataset[,c("club_name","economic_ratio")]
club_economical <- aggregate(club_economical$economic_ratio, by = list(Group = club_economical$club_name), FUN = mean)
names(club_economical)[1] <- "club_name"
names(club_economical)[2] <- "economic_ratio"
club_economical <- club_economical[order(-club_economical$economic_ratio), ]
club_economical$economic_ratio <- round(club_economical$economic_ratio,2)
head(club_economical)
## club_name economic_ratio
## 492 Real Madrid 1753.59
## 223 FC Barcelona 1661.11
## 390 Manchester City 1323.29
## 378 Liverpool 1111.06
## 391 Manchester United 1071.96
## 335 Inter 1015.58
top_10_club_economical <- club_economical[1:10, ]
top_10_club_economical
## club_name economic_ratio
## 492 Real Madrid 1753.59
## 223 FC Barcelona 1661.11
## 390 Manchester City 1323.29
## 378 Liverpool 1111.06
## 391 Manchester United 1071.96
## 335 Inter 1015.58
## 137 Chelsea 978.01
## 225 FC Bayern München 943.96
## 615 Tottenham Hotspur 917.44
## 453 Paris Saint-Germain 863.12
top_10_clubs <- top_10_club_economical$club_name
top_10_clubs
## [1] "Real Madrid" "FC Barcelona" "Manchester City"
## [4] "Liverpool" "Manchester United" "Inter"
## [7] "Chelsea" "FC Bayern München" "Tottenham Hotspur"
## [10] "Paris Saint-Germain"
p <- ggplot(top_10_club_economical, aes(x = club_name, y = economic_ratio)) + geom_bar(stat = "identity", width = 0.8, fill = "#0066CC") + theme(axis.text.x = element_text(angle = 90)) + labs(title = "Clubs with the Top Economic Ratio")
ggplotly(p)
From the above barplot, we can see the economic ratio, of the top 10 clubs (economic ratio wise). We can see that Real Madrid are the club most likely to spend on high quality players, followed by Barcelona and Chelsea. But the error in this data, is the inclusion of economic ratio of player bought long back.
top_10_clubs <- filter(dataset, club_name %in% top_10_clubs)
head(top_10_clubs)
## sofifa_id short_name long_name age height_cm
## 1 158023 L. Messi Lionel Andrés Messi Cuccittini 33 170
## 2 188545 R. Lewandowski Robert Lewandowski 31 184
## 3 190871 Neymar Jr Neymar da Silva Santos Júnior 28 175
## 4 192985 K. De Bruyne Kevin De Bruyne 29 181
## 5 231747 K. Mbappé Kylian Mbappé Lottin 21 178
## 6 192448 M. ter Stegen Marc-André ter Stegen 28 187
## weight_kg nationality club_name league_name league_rank
## 1 72 Argentina FC Barcelona Spain Primera Division 1
## 2 80 Poland FC Bayern München German 1. Bundesliga 1
## 3 68 Brazil Paris Saint-Germain French Ligue 1 1
## 4 70 Belgium Manchester City English Premier League 1
## 5 73 France Paris Saint-Germain French Ligue 1 1
## 6 85 Germany FC Barcelona Spain Primera Division 1
## overall potential value_eur wage_eur player_positions preferred_foot
## 1 93 93 67500000 560000 RW, ST, CF Left
## 2 91 91 80000000 240000 ST Right
## 3 91 91 90000000 270000 LW, CAM Right
## 4 91 91 87000000 370000 CAM, CM Right
## 5 90 95 105500000 160000 ST, LW, RW Right
## 6 90 93 69500000 260000 GK Right
## international_reputation weak_foot skill_moves work_rate
## 1 5 4 4 Medium/Low
## 2 4 4 4 High/Medium
## 3 5 5 5 High/Medium
## 4 4 5 4 High/High
## 5 3 4 5 High/Low
## 6 3 4 1 Medium/Medium
## release_clause_eur
## 1 138400000
## 2 132000000
## 3 166500000
## 4 161000000
## 5 203100000
## 6 147700000
## player_tags
## 1 #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward
## 2 #Distance Shooter, #Clinical Finisher
## 3 #Speedster, #Dribbler, #Playmaker, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Midfielder, #Complete Forward
## 4 #Dribbler, #Playmaker, #Engine, #Distance Shooter, #Crosser, #Complete Midfielder
## 5 #Speedster, #Dribbler, #Acrobat
## 6
## team_position loaned_from joined contract_valid_until nation_position
## 1 CAM 2004-07-01 2021 RW
## 2 ST 2014-07-01 2023
## 3 LW 2017-08-03 2022
## 4 RCM 2015-08-30 2023 RCM
## 5 LS 2018-07-01 2022 RM
## 6 GK 2014-07-01 2022 SUB
## pace shooting passing dribbling defending physic gk_diving gk_handling
## 1 85 92 91 95 38 65 NA NA
## 2 78 91 78 85 43 82 NA NA
## 3 91 85 86 94 36 59 NA NA
## 4 76 86 93 88 64 78 NA NA
## 5 96 86 78 91 39 76 NA NA
## 6 NA NA NA NA NA NA 88 85
## gk_kicking gk_reflexes gk_speed gk_positioning
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 88 90 45 88
## player_traits
## 1 Finesse Shot, Long Shot Taker (AI), Speed Dribbler (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Team Player, Chip Shot (AI)
## 2 Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI)
## 3 Injury Prone, Flair, Speed Dribbler (AI), Outside Foot Shot, Technical Dribbler (AI)
## 4 Injury Prone, Leadership, Early Crosser, Long Passer (AI), Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot
## 5 Finesse Shot, Flair, Speed Dribbler (AI), Outside Foot Shot, Technical Dribbler (AI)
## 6 Rushes Out Of Goal, Comes For Crosses, Saves with Feet
## attacking_crossing attacking_finishing attacking_heading_accuracy
## 1 85 95 70
## 2 71 94 85
## 3 85 87 62
## 4 94 82 55
## 5 78 91 73
## 6 18 14 11
## attacking_short_passing attacking_volleys skill_dribbling skill_curve
## 1 91 88 96 93
## 2 84 89 85 79
## 3 87 87 95 88
## 4 94 82 88 85
## 5 83 83 92 79
## 6 61 14 21 18
## skill_fk_accuracy skill_long_passing skill_ball_control movement_acceleration
## 1 94 91 96 91
## 2 85 70 88 77
## 3 89 81 95 94
## 4 83 93 92 77
## 5 63 70 90 96
## 6 12 63 30 38
## movement_sprint_speed movement_agility movement_reactions movement_balance
## 1 80 91 94 95
## 2 78 77 93 82
## 3 89 96 91 83
## 4 76 78 91 76
## 5 96 92 92 82
## 6 50 37 86 43
## power_shot_power power_jumping power_stamina power_strength power_long_shots
## 1 86 68 72 69 94
## 2 89 84 76 86 85
## 3 80 62 81 50 84
## 4 91 63 89 74 91
## 5 86 77 86 76 79
## 6 66 79 35 78 10
## mentality_aggression mentality_interceptions mentality_positioning
## 1 44 40 93
## 2 81 49 94
## 3 51 36 87
## 4 76 66 88
## 5 62 38 91
## 6 43 22 11
## mentality_vision mentality_penalties mentality_composure defending_marking
## 1 95 75 96 NA
## 2 79 88 88 NA
## 3 90 92 93 NA
## 4 94 84 91 NA
## 5 80 70 84 NA
## 6 70 25 70 NA
## defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## 1 35 24 6
## 2 42 19 15
## 3 30 29 9
## 4 65 53 15
## 5 34 32 13
## 6 13 10 88
## goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## 1 11 15 14
## 2 6 12 8
## 3 9 15 15
## 4 13 5 10
## 5 5 7 11
## 6 85 88 88
## goalkeeping_reflexes ls st rs lw lf cf rf rw lam cam ram
## 1 8 89+3 89+3 89+3 92+0 93+0 93+0 93+0 92+0 93+0 93+0 93+0
## 2 10 89+2 89+2 89+2 85+0 87+0 87+0 87+0 85+0 85+3 85+3 85+3
## 3 11 84+3 84+3 84+3 90+0 89+0 89+0 89+0 90+0 90+1 90+1 90+1
## 4 13 83+3 83+3 83+3 88+0 88+0 88+0 88+0 88+0 89+2 89+2 89+2
## 5 6 88+3 88+3 88+3 89+0 89+0 89+0 89+0 89+0 87+3 87+3 87+3
## 6 90 35+3 35+3 35+3 34+0 38+0 38+0 38+0 34+0 42+3 42+3 42+3
## lm lcm cm rcm rm lwb ldm cdm rdm rwb lb lcb cb rcb rb
## 1 91+2 87+3 87+3 87+3 91+2 66+3 65+3 65+3 65+3 66+3 62+3 52+3 52+3 52+3 62+3
## 2 83+3 79+3 79+3 79+3 83+3 64+3 65+3 65+3 65+3 64+3 61+3 60+3 60+3 60+3 61+3
## 3 90+1 83+3 83+3 83+3 90+1 67+3 62+3 62+3 62+3 67+3 62+3 49+3 49+3 49+3 62+3
## 4 89+2 89+2 89+2 89+2 89+2 79+3 80+3 80+3 80+3 79+3 75+3 69+3 69+3 69+3 75+3
## 5 87+3 79+3 79+3 79+3 87+3 67+3 63+3 63+3 63+3 67+3 63+3 55+3 55+3 55+3 63+3
## 6 39+3 45+3 45+3 45+3 39+3 33+3 41+3 41+3 41+3 33+3 31+3 33+3 33+3 33+3 31+3
top_10_clubs <- subset(top_10_clubs, select = -c(height_cm, weight_kg, nationality, player_positions, preferred_foot, international_reputation, weak_foot, skill_moves, work_rate))
top_10_clubs <- subset(top_10_clubs, select = -c(player_tags, team_position, loaned_from))
top_10_clubs <- top_10_clubs[-c(14:87)]
#View(top_10_clubs)
copy_top_10_clubs <- top_10_clubs
top_10_clubs$joined <- as.Date(top_10_clubs$joined, "%Y-%m-%d")
top_10_clubs$joined <- as.character(format(top_10_clubs$joined,"%Y"))
head(top_10_clubs)
## sofifa_id short_name long_name age
## 1 158023 L. Messi Lionel Andrés Messi Cuccittini 33
## 2 188545 R. Lewandowski Robert Lewandowski 31
## 3 190871 Neymar Jr Neymar da Silva Santos Júnior 28
## 4 192985 K. De Bruyne Kevin De Bruyne 29
## 5 231747 K. Mbappé Kylian Mbappé Lottin 21
## 6 192448 M. ter Stegen Marc-André ter Stegen 28
## club_name league_name league_rank overall potential
## 1 FC Barcelona Spain Primera Division 1 93 93
## 2 FC Bayern München German 1. Bundesliga 1 91 91
## 3 Paris Saint-Germain French Ligue 1 1 91 91
## 4 Manchester City English Premier League 1 91 91
## 5 Paris Saint-Germain French Ligue 1 1 90 95
## 6 FC Barcelona Spain Primera Division 1 90 93
## value_eur wage_eur release_clause_eur joined rb
## 1 67500000 560000 138400000 2004 62+3
## 2 80000000 240000 132000000 2014 61+3
## 3 90000000 270000 166500000 2017 62+3
## 4 87000000 370000 161000000 2015 75+3
## 5 105500000 160000 203100000 2018 63+3
## 6 69500000 260000 147700000 2014 31+3
tail(top_10_clubs)
## sofifa_id short_name long_name age club_name
## 309 256831 D. Cirkin Dennis Cirkin 18 Tottenham Hotspur
## 310 252793 T. Harwood-Bellis Taylor Harwood-Bellis 18 Manchester City
## 311 252794 Adrián Bernabé Adrián Bernabé GarcÃa 19 Manchester City
## 312 240913 C. Kelleher Caoimhin Kelleher 21 Liverpool
## 313 254120 T. Doyle Thomas Doyle 18 Manchester City
## 314 245903 H. White Harvey White 18 Tottenham Hotspur
## league_name league_rank overall potential value_eur wage_eur
## 309 English Premier League 1 61 83 525000 3000
## 310 English Premier League 1 61 82 525000 4000
## 311 English Premier League 1 61 77 550000 9000
## 312 English Premier League 1 61 75 475000 6000
## 313 English Premier League 1 60 82 475000 5000
## 314 English Premier League 1 60 80 400000 3000
## release_clause_eur joined rb
## 309 1500000 2019 59+2
## 310 1500000 2019 59+2
## 311 1400000 2019 49+2
## 312 1200000 2017 21+2
## 313 1400000 2019 55+2
## 314 1200000 2018 60+2
transfer_expenditure <- filter(top_10_clubs, joined %in% c("2015","2016","2017","2018","2019","2020"))
#View(transfer_expenditure)
teconomical_ratio <- transfer_expenditure$wage_eur / transfer_expenditure$potential
transfer_expenditure$economic_ratio <- round(teconomical_ratio,2)
#View(transfer_expenditure)
Transfer_club_economical <- transfer_expenditure[,c("club_name","economic_ratio")]
Transfer_club_economical <- aggregate(Transfer_club_economical$economic_ratio, by = list(Group = Transfer_club_economical$club_name), FUN = mean)
names(Transfer_club_economical)[1] <- "club_name"
names(Transfer_club_economical)[2] <- "economic_ratio"
Transfer_club_economical <- Transfer_club_economical[order(-Transfer_club_economical$economic_ratio), ]
Transfer_club_economical$economic_ratio <- round(Transfer_club_economical$economic_ratio,2)
Transfer_club_economical
## club_name economic_ratio
## 6 Manchester City 1309.17
## 9 Real Madrid 1283.66
## 2 FC Barcelona 1241.91
## 5 Liverpool 1109.28
## 4 Inter 1039.29
## 7 Manchester United 999.12
## 1 Chelsea 974.57
## 10 Tottenham Hotspur 819.82
## 8 Paris Saint-Germain 813.44
## 3 FC Bayern München 751.59
p <- ggplot(Transfer_club_economical, aes(x = club_name, y = economic_ratio)) + geom_bar(stat = "identity", width = 0.8, fill = "#0066CC") + theme(axis.text.x = element_text(angle = 90)) + labs(title = "Clubs with the Top Economic Ratio in last 5 years")
ggplotly(p)
From the newly ploted economical club barplot, we can see Manchester City are the most economical club in recent years and not Real Madrid
man_city <- filter(transfer_expenditure, club_name %in% c("Manchester City","Real Madrid","FC Barcelona"))
ggplot(man_city, aes(fill = club_name, y=economic_ratio, x=joined)) +
geom_bar(position="dodge",stat="identity")
This plot gives us better view and shows us that the spending of Manchester City has been declining in recent years, whereas Real Madrid have had an up and down spending. Real Madrid have had a low spending in 2017, followed by a great spending in 2018, 2019. Fc Barcelona have had a consistent spending in all years, and are more likely to be active in the market.
dataset2 <- read.csv("players_17.csv")
dataset2 <- subset(dataset2, select = c(long_name,potential,overall))
dataset1 <- subset(dataset, select = c(long_name,potential,overall))
newdataset <- merge(dataset1,dataset2, by.x = "long_name", by.y = "long_name")
newdataset$overalldiff <- newdataset$overall.x - newdataset$overall.y
newdataset$potentialdiff <- newdataset$potential.x - newdataset$potential.y
newdataset$overall_goal <- newdataset$overall.x - newdataset$potential.y
head(newdataset)
## long_name potential.x overall.x potential.y overall.y
## 1 Ögmundur Kristinsson 64 64 65 63
## 2 Ömer Ali Şahiner 74 74 78 74
## 3 Ömer Bayram 72 72 70 68
## 4 Ömer Hasan Şişmanoğlu 69 69 75 74
## 5 Ömer Toprak 76 76 85 84
## 6 喜田 拓也 73 69 74 66
## overalldiff potentialdiff overall_goal
## 1 1 -1 -1
## 2 0 -4 -4
## 3 4 2 2
## 4 -5 -6 -6
## 5 -8 -9 -9
## 6 3 -1 -5
newdataset <- subset(newdataset, select = -c(potential.x,overall.x,potential.y,overall.y))
names(newdataset)[2] <- "improv_overall"
names(newdataset)[3] <- "improv_potential"
names(newdataset)[4] <- "target_overall"
head(newdataset)
## long_name improv_overall improv_potential target_overall
## 1 Ögmundur Kristinsson 1 -1 -1
## 2 Ömer Ali Şahiner 0 -4 -4
## 3 Ömer Bayram 4 2 2
## 4 Ömer Hasan Şişmanoğlu -5 -6 -6
## 5 Ömer Toprak -8 -9 -9
## 6 喜田 拓也 3 -1 -5
dataset <- merge(dataset,newdataset, by.x = "long_name", by.y = "long_name")
dataset <- subset(dataset, select = -c(long_name))
In the above table the inclusion of the three new columns imporv_overall, imporv_potential and target_overall are the improvement in player overall, potential in the last 5 years and the last target is the difference between the overall now and potential 5 years ago,
dataset <- dataset[order(-dataset$overall),]
dataset <- subset(dataset, select = -c(height_cm, weight_kg, preferred_foot, international_reputation, weak_foot, skill_moves,loaned_from,joined,player_traits))
fifa21 <- dataset[,-c(65:90)]
head(fifa21)
## sofifa_id short_name age nationality club_name
## 4998 158023 L. Messi 33 Argentina FC Barcelona
## 1727 20801 Cristiano Ronaldo 35 Portugal Juventus
## 3806 200389 J. Oblak 27 Slovenia Atlético Madrid
## 4690 192985 K. De Bruyne 29 Belgium Manchester City
## 6197 190871 Neymar Jr 28 Brazil Paris Saint-Germain
## 6986 188545 R. Lewandowski 31 Poland FC Bayern München
## league_name league_rank overall potential value_eur wage_eur
## 4998 Spain Primera Division 1 93 93 67500000 560000
## 1727 Italian Serie A 1 92 92 46000000 220000
## 3806 Spain Primera Division 1 91 93 75000000 125000
## 4690 English Premier League 1 91 91 87000000 370000
## 6197 French Ligue 1 1 91 91 90000000 270000
## 6986 German 1. Bundesliga 1 91 91 80000000 240000
## player_positions work_rate release_clause_eur
## 4998 RW, ST, CF Medium/Low 138400000
## 1727 ST, LW High/Low 75900000
## 3806 GK Medium/Medium 159400000
## 4690 CAM, CM High/High 161000000
## 6197 LW, CAM High/Medium 166500000
## 6986 ST High/Medium 132000000
## player_tags
## 4998 #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward
## 1727 #Aerial Threat, #Dribbler, #Distance Shooter, #Acrobat, #Clinical Finisher, #Complete Forward
## 3806
## 4690 #Dribbler, #Playmaker, #Engine, #Distance Shooter, #Crosser, #Complete Midfielder
## 6197 #Speedster, #Dribbler, #Playmaker, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Midfielder, #Complete Forward
## 6986 #Distance Shooter, #Clinical Finisher
## team_position contract_valid_until nation_position pace shooting passing
## 4998 CAM 2021 RW 85 92 91
## 1727 LS 2022 LS 89 93 81
## 3806 GK 2023 GK NA NA NA
## 4690 RCM 2023 RCM 76 86 93
## 6197 LW 2022 91 85 86
## 6986 ST 2023 78 91 78
## dribbling defending physic gk_diving gk_handling gk_kicking gk_reflexes
## 4998 95 38 65 NA NA NA NA
## 1727 89 35 77 NA NA NA NA
## 3806 NA NA NA 87 92 78 90
## 4690 88 64 78 NA NA NA NA
## 6197 94 36 59 NA NA NA NA
## 6986 85 43 82 NA NA NA NA
## gk_speed gk_positioning attacking_crossing attacking_finishing
## 4998 NA NA 85 95
## 1727 NA NA 84 95
## 3806 52 90 13 11
## 4690 NA NA 94 82
## 6197 NA NA 85 87
## 6986 NA NA 71 94
## attacking_heading_accuracy attacking_short_passing attacking_volleys
## 4998 70 91 88
## 1727 90 82 86
## 3806 15 43 13
## 4690 55 94 82
## 6197 62 87 87
## 6986 85 84 89
## skill_dribbling skill_curve skill_fk_accuracy skill_long_passing
## 4998 96 93 94 91
## 1727 88 81 76 77
## 3806 12 13 14 40
## 4690 88 85 83 93
## 6197 95 88 89 81
## 6986 85 79 85 70
## skill_ball_control movement_acceleration movement_sprint_speed
## 4998 96 91 80
## 1727 92 87 91
## 3806 30 43 60
## 4690 92 77 76
## 6197 95 94 89
## 6986 88 77 78
## movement_agility movement_reactions movement_balance power_shot_power
## 4998 91 94 95 86
## 1727 87 95 71 94
## 3806 67 88 49 59
## 4690 78 91 76 91
## 6197 96 91 83 80
## 6986 77 93 82 89
## power_jumping power_stamina power_strength power_long_shots
## 4998 68 72 69 94
## 1727 95 84 78 93
## 3806 78 41 78 12
## 4690 63 89 74 91
## 6197 62 81 50 84
## 6986 84 76 86 85
## mentality_aggression mentality_interceptions mentality_positioning
## 4998 44 40 93
## 1727 63 29 95
## 3806 34 19 11
## 4690 76 66 88
## 6197 51 36 87
## 6986 81 49 94
## mentality_vision mentality_penalties mentality_composure defending_marking
## 4998 95 75 96 NA
## 1727 82 84 95 NA
## 3806 65 11 68 NA
## 4690 94 84 91 NA
## 6197 90 92 93 NA
## 6986 79 88 88 NA
## defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## 4998 35 24 6
## 1727 32 24 7
## 3806 12 18 87
## 4690 65 53 15
## 6197 30 29 9
## 6986 42 19 15
## goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## 4998 11 15 14
## 1727 11 15 14
## 3806 92 78 90
## 4690 13 5 10
## 6197 9 15 15
## 6986 6 12 8
## goalkeeping_reflexes improv_overall improv_potential target_overall
## 4998 8 0 0 0
## 1727 11 -2 -2 -2
## 3806 90 4 2 0
## 4690 13 3 0 0
## 6197 11 -1 -4 -4
## 6986 10 1 1 1
This question is addressed to predict players based on their strengths, so clubs can predict how much they need to spend and spend on the right player
positions <- factor(fifa21$team_position)
nlevels(positions)
## [1] 30
levels(positions)
## [1] "" "CAM" "CB" "CDM" "CF" "CM" "GK" "LAM" "LB" "LCB" "LCM" "LDM"
## [13] "LF" "LM" "LS" "LW" "LWB" "RAM" "RB" "RCB" "RCM" "RDM" "RES" "RF"
## [25] "RM" "RS" "RW" "RWB" "ST" "SUB"
source("fun.R")
fifa21$team_position <- position_filter(fifa21$team_position, fifa21$player_positions)
positions <- factor(fifa21$team_position)
nlevels(positions)
## [1] 27
levels(positions)
## [1] "CAM" "CB" "CDM" "CF" "CM" "GK" "LAM" "LB" "LCB" "LCM" "LDM" "LF"
## [13] "LM" "LS" "LW" "LWB" "RAM" "RB" "RCB" "RCM" "RDM" "RF" "RM" "RS"
## [25] "RW" "RWB" "ST"
fifa21$team_position[fifa21$team_position == "CAM" | fifa21$team_position == "LAM" | fifa21$team_position == "RAM" ] <- "AMF"
fifa21$team_position[fifa21$team_position == "CDM" | fifa21$team_position == "LDM" | fifa21$team_position == "RDM" ] <- "DMF"
fifa21$team_position[fifa21$team_position == "CM" | fifa21$team_position == "LCM" | fifa21$team_position == "RCM" ] <- "CMF"
fifa21$team_position[fifa21$team_position == "CB" | fifa21$team_position == "LCB" | fifa21$team_position == "RCB" ] <- "FB"
fifa21$team_position[fifa21$team_position == "CF" | fifa21$team_position == "LF" | fifa21$team_position == "RF" ] <- "FW"
fifa21$team_position[fifa21$team_position == "LS" | fifa21$team_position == "RS"] <- "SS"
fifa21$team_position[fifa21$team_position == "LWB"] <- "LB"
fifa21$team_position[fifa21$team_position == "RWB"] <- "RB"
positions <- factor(fifa21$team_position)
nlevels(positions)
## [1] 14
levels(positions)
## [1] "AMF" "CMF" "DMF" "FB" "FW" "GK" "LB" "LM" "LW" "RB" "RM" "RW"
## [13] "SS" "ST"
samplefifa21 <- fifa21[fifa21$improv_overall > 0, ]
fifa21 <- subset(fifa21, select = -c(league_rank, player_positions, player_tags, nation_position))
nums <- unlist(lapply(fifa21, is.numeric))
corr_set <- fifa21
corr_set <- corr_set[,nums]
corr_set <- subset(corr_set,select = -c(sofifa_id,contract_valid_until))
summary(corr_set)
## age overall potential value_eur
## Min. :17.00 Min. :49.00 Min. :52.00 Min. : 0
## 1st Qu.:25.00 1st Qu.:66.00 1st Qu.:67.00 1st Qu.: 525000
## Median :28.00 Median :69.00 Median :71.00 Median : 1100000
## Mean :27.82 Mean :69.76 Mean :71.81 Mean : 3719386
## 3rd Qu.:30.00 3rd Qu.:73.00 3rd Qu.:75.00 3rd Qu.: 4000000
## Max. :43.00 Max. :93.00 Max. :95.00 Max. :105500000
##
## wage_eur release_clause_eur pace shooting
## Min. : 0 Min. : 13000 Min. :28.00 Min. :16.00
## 1st Qu.: 2000 1st Qu.: 878000 1st Qu.:60.00 1st Qu.:46.00
## Median : 6000 Median : 2000000 Median :69.00 Median :59.00
## Mean : 15193 Mean : 7080571 Mean :67.38 Mean :56.26
## 3rd Qu.: 17000 3rd Qu.: 7600000 3rd Qu.:76.00 3rd Qu.:67.00
## Max. :560000 Max. :203100000 Max. :96.00 Max. :93.00
## NA's :379 NA's :940 NA's :940
## passing dribbling defending physic
## Min. :25.00 Min. :28.00 Min. :17.00 Min. :35.00
## 1st Qu.:56.00 1st Qu.:61.00 1st Qu.:41.00 1st Qu.:64.00
## Median :62.00 Median :67.00 Median :61.00 Median :70.00
## Mean :61.72 Mean :65.79 Mean :56.01 Mean :68.61
## 3rd Qu.:68.00 3rd Qu.:72.00 3rd Qu.:68.00 3rd Qu.:74.00
## Max. :93.00 Max. :95.00 Max. :91.00 Max. :91.00
## NA's :940 NA's :940 NA's :940 NA's :940
## gk_diving gk_handling gk_kicking gk_reflexes gk_speed
## Min. :48.00 Min. :43.00 Min. :40.00 Min. :48.0 Min. :12.00
## 1st Qu.:65.00 1st Qu.:63.00 1st Qu.:61.00 1st Qu.:66.0 1st Qu.:36.00
## Median :69.00 Median :67.00 Median :65.00 Median :70.0 Median :43.00
## Mean :69.71 Mean :67.02 Mean :65.73 Mean :70.7 Mean :42.36
## 3rd Qu.:74.00 3rd Qu.:71.00 3rd Qu.:70.00 3rd Qu.:75.0 3rd Qu.:48.00
## Max. :90.00 Max. :92.00 Max. :93.00 Max. :90.0 Max. :65.00
## NA's :7397 NA's :7397 NA's :7397 NA's :7397 NA's :7397
## gk_positioning attacking_crossing attacking_finishing
## Min. :46.00 Min. : 6.00 Min. : 5.00
## 1st Qu.:64.00 1st Qu.:44.00 1st Qu.:33.00
## Median :68.00 Median :59.00 Median :53.00
## Mean :68.33 Mean :53.85 Mean :48.74
## 3rd Qu.:73.00 3rd Qu.:68.00 3rd Qu.:65.00
## Max. :91.00 Max. :94.00 Max. :95.00
## NA's :7397
## attacking_heading_accuracy attacking_short_passing attacking_volleys
## Min. : 5.00 Min. :11.00 Min. : 5.00
## 1st Qu.:48.00 1st Qu.:59.00 1st Qu.:32.00
## Median :59.00 Median :66.00 Median :49.00
## Mean :55.37 Mean :62.62 Mean :46.52
## 3rd Qu.:68.00 3rd Qu.:72.00 3rd Qu.:61.00
## Max. :93.00 Max. :94.00 Max. :90.00
##
## skill_dribbling skill_curve skill_fk_accuracy skill_long_passing
## Min. : 5.00 Min. : 6.00 Min. : 6.00 Min. : 9.00
## 1st Qu.:53.00 1st Qu.:38.00 1st Qu.:33.00 1st Qu.:51.00
## Median :64.00 Median :56.00 Median :48.00 Median :61.00
## Mean :58.62 Mean :51.75 Mean :46.76 Mean :57.35
## 3rd Qu.:71.00 3rd Qu.:67.00 3rd Qu.:62.00 3rd Qu.:67.00
## Max. :96.00 Max. :94.00 Max. :94.00 Max. :93.00
##
## skill_ball_control movement_acceleration movement_sprint_speed
## Min. : 5.00 Min. :13.00 Min. :12.00
## 1st Qu.:59.00 1st Qu.:55.00 1st Qu.:55.00
## Median :66.00 Median :67.00 Median :67.00
## Mean :62.14 Mean :64.48 Mean :64.61
## 3rd Qu.:72.00 3rd Qu.:75.00 3rd Qu.:75.00
## Max. :96.00 Max. :97.00 Max. :96.00
##
## movement_agility movement_reactions movement_balance power_shot_power
## Min. :14.00 Min. :31.00 Min. :18.0 Min. :18.0
## 1st Qu.:58.00 1st Qu.:61.00 1st Qu.:56.0 1st Qu.:53.0
## Median :68.00 Median :66.00 Median :66.0 Median :64.0
## Mean :65.43 Mean :66.44 Mean :64.4 Mean :62.4
## 3rd Qu.:75.00 3rd Qu.:71.00 3rd Qu.:74.0 3rd Qu.:72.0
## Max. :96.00 Max. :95.00 Max. :97.0 Max. :95.0
##
## power_jumping power_stamina power_strength power_long_shots
## Min. :15.00 Min. :14.00 Min. :24.00 Min. : 4.00
## 1st Qu.:61.00 1st Qu.:61.00 1st Qu.:62.00 1st Qu.:38.00
## Median :69.00 Median :70.00 Median :70.00 Median :57.00
## Mean :67.54 Mean :66.14 Mean :68.58 Mean :51.42
## 3rd Qu.:76.00 3rd Qu.:76.00 3rd Qu.:76.00 3rd Qu.:66.00
## Max. :95.00 Max. :97.00 Max. :97.00 Max. :94.00
##
## mentality_aggression mentality_interceptions mentality_positioning
## Min. :10.00 Min. : 6.00 Min. : 3.00
## 1st Qu.:50.00 1st Qu.:32.00 1st Qu.:43.00
## Median :65.00 Median :59.00 Median :60.00
## Mean :60.55 Mean :51.34 Mean :53.67
## 3rd Qu.:73.00 3rd Qu.:68.00 3rd Qu.:68.00
## Max. :95.00 Max. :91.00 Max. :95.00
##
## mentality_vision mentality_penalties mentality_composure
## Min. :10.00 Min. : 7.00 Min. :12.00
## 1st Qu.:49.00 1st Qu.:40.00 1st Qu.:58.00
## Median :60.00 Median :53.00 Median :65.00
## Mean :57.93 Mean :51.01 Mean :63.68
## 3rd Qu.:68.00 3rd Qu.:64.00 3rd Qu.:70.00
## Max. :95.00 Max. :92.00 Max. :96.00
##
## defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## Min. : 7.00 Min. : 6.00 Min. : 1.00
## 1st Qu.:31.00 1st Qu.:27.00 1st Qu.: 8.00
## Median :61.00 Median :58.00 Median :11.00
## Mean :51.39 Mean :48.92 Mean :17.39
## 3rd Qu.:69.00 3rd Qu.:67.00 3rd Qu.:14.00
## Max. :93.00 Max. :90.00 Max. :90.00
##
## goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## Min. : 1.00 Min. : 1.00 Min. : 1.00
## 1st Qu.: 9.00 1st Qu.: 9.00 1st Qu.: 8.00
## Median :11.00 Median :11.00 Median :11.00
## Mean :17.16 Mean :17.03 Mean :17.27
## 3rd Qu.:14.00 3rd Qu.:14.00 3rd Qu.:14.00
## Max. :92.00 Max. :93.00 Max. :91.00
##
## goalkeeping_reflexes improv_overall improv_potential target_overall
## Min. : 1.0 Min. :-24.000 Min. :-19.000 Min. :-26.000
## 1st Qu.: 8.0 1st Qu.: -2.000 1st Qu.: -5.000 1st Qu.: -7.000
## Median :11.0 Median : 1.000 Median : -2.000 Median : -3.000
## Mean :17.5 Mean : 1.846 Mean : -1.385 Mean : -3.434
## 3rd Qu.:14.0 3rd Qu.: 5.000 3rd Qu.: 1.000 3rd Qu.: 0.000
## Max. :90.0 Max. : 29.000 Max. : 23.000 Max. : 23.000
##
#gk set
corr_set1 <- corr_set[complete.cases(corr_set$gk_diving),]
corr_set1 <- corr_set1[ , colSums(is.na(corr_set1)) == 0]
#rest
corr_set2 <- corr_set[complete.cases(corr_set$pace),]
corr_set2 <- corr_set2[ , colSums(is.na(corr_set2)) == 0]
cormat1 <- round(cor(corr_set1),2)
cormat2 <- round(cor(corr_set2),2)
mcorr_set1 <- melt(cormat1)
theme_set(theme_gray(base_size = 5))
g <- ggplot(data = mcorr_set1, aes(x=Var1, y=Var2, fill=value)) +
geom_raster() + theme(axis.text.x = element_text(angle = 90)) + scale_fill_gradient2(low="blue", high="red", guide="colorbar")
ggplotly(g)
From the above matrix, we can see the critical attributes for goalkeepers, and these factors can be used to predict the market values of players.
mcorr_set2 <- melt(cormat2)
theme_set(theme_gray(base_size = 5))
g <- ggplot(data = mcorr_set2, aes(x=Var1, y=Var2, fill=value)) +
geom_raster() + theme(axis.text.x = element_text(angle = 90)) + scale_fill_gradient2(low="blue", high="red", guide="colorbar")
ggplotly(g)
From this above matrix, we can see the attributes affecting the non-goalkeepers. We can see the positively correlated factors for the strikers, defenders, midfielders.
goalkeeper <- samplefifa21[samplefifa21$team_position == "GK",]
dim(goalkeeper)
## [1] 570 67
defenders <- samplefifa21[samplefifa21$team_position == "FB" | samplefifa21$team_position == "LB" |
samplefifa21$team_position == "RB", ]
dim(defenders)
## [1] 1585 67
midfielders <- samplefifa21[samplefifa21$team_position == "CMF" | samplefifa21$team_position == "DMF" |
samplefifa21$team_position == "AMF" | samplefifa21$team_position == "RM" |
samplefifa21$team_position == "LM", ]
dim(midfielders)
## [1] 1618 67
forwards <- samplefifa21[samplefifa21$team_position == "FW" | samplefifa21$team_position == "LW" |
samplefifa21$team_position == "RW" | samplefifa21$team_position == "SS" |
samplefifa21$team_position == "ST", ]
dim(forwards)
## [1] 770 67
train.index = sample(seq(1,nrow(goalkeeper)), floor(0.8*nrow(goalkeeper)))
selected.var <- c(2,3,8:11,25:30,44,46)
train.t <- goalkeeper[train.index, selected.var]
valid.t <- goalkeeper[-train.index, selected.var]
head(valid.t)
## short_name age overall potential value_eur wage_eur gk_diving
## 2364 Ederson 26 88 91 53500000 195000 86
## 4717 K. Navas 33 87 87 27000000 110000 90
## 8245 Y. Sommer 31 86 86 28500000 55000 80
## 7086 R. Bürki 29 84 85 25000000 67000 85
## 5200 L. Hrádecký 30 83 83 17500000 63000 85
## 6209 N. Pope 28 82 83 18500000 59000 79
## gk_handling gk_kicking gk_reflexes gk_speed gk_positioning
## 2364 82 93 88 63 86
## 4717 81 75 90 53 82
## 8245 86 85 85 51 87
## 7086 82 72 88 47 82
## 5200 78 69 87 41 83
## 6209 81 75 84 48 83
## movement_reactions power_shot_power
## 2364 87 70
## 4717 84 56
## 8245 84 64
## 7086 82 54
## 5200 82 52
## 6209 75 56
gklm = lm(value_eur ~ overall * age + gk_reflexes * movement_reactions + wage_eur + gk_diving * gk_positioning + potential + gk_speed+ gk_handling * gk_kicking
* power_shot_power,data = train.t)
summary(gklm)
##
## Call:
## lm(formula = value_eur ~ overall * age + gk_reflexes * movement_reactions +
## wage_eur + gk_diving * gk_positioning + potential + gk_speed +
## gk_handling * gk_kicking * power_shot_power, data = train.t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9531534 -775268 29726 774259 25162412
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.736e+08 6.170e+07 -7.676 1.08e-13
## overall 3.141e+06 3.401e+05 9.236 < 2e-16
## age 4.557e+06 3.419e+05 13.328 < 2e-16
## gk_reflexes -1.313e+06 1.806e+05 -7.272 1.66e-12
## movement_reactions -1.352e+06 1.818e+05 -7.438 5.49e-13
## wage_eur 6.546e+01 8.989e+00 7.282 1.55e-12
## gk_diving -2.462e+06 2.534e+05 -9.717 < 2e-16
## gk_positioning -2.436e+06 2.599e+05 -9.373 < 2e-16
## potential 1.557e+05 5.862e+04 2.655 0.00821
## gk_speed -6.468e+03 1.191e+04 -0.543 0.58738
## gk_handling 7.299e+06 1.036e+06 7.047 7.18e-12
## gk_kicking 1.595e+07 3.453e+06 4.620 5.05e-06
## power_shot_power 2.946e+06 4.237e+06 0.695 0.48730
## overall:age -6.675e+04 4.729e+03 -14.115 < 2e-16
## gk_reflexes:movement_reactions 1.778e+04 2.534e+03 7.018 8.66e-12
## gk_diving:gk_positioning 3.190e+04 3.605e+03 8.850 < 2e-16
## gk_handling:gk_kicking -2.249e+05 5.119e+04 -4.394 1.40e-05
## gk_handling:power_shot_power -3.206e+04 6.229e+04 -0.515 0.60702
## gk_kicking:power_shot_power -1.975e+05 1.899e+04 -10.403 < 2e-16
## gk_handling:gk_kicking:power_shot_power 2.702e+03 2.781e+02 9.714 < 2e-16
##
## (Intercept) ***
## overall ***
## age ***
## gk_reflexes ***
## movement_reactions ***
## wage_eur ***
## gk_diving ***
## gk_positioning ***
## potential **
## gk_speed
## gk_handling ***
## gk_kicking ***
## power_shot_power
## overall:age ***
## gk_reflexes:movement_reactions ***
## gk_diving:gk_positioning ***
## gk_handling:gk_kicking ***
## gk_handling:power_shot_power
## gk_kicking:power_shot_power ***
## gk_handling:gk_kicking:power_shot_power ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2120000 on 436 degrees of freedom
## Multiple R-squared: 0.9225, Adjusted R-squared: 0.9192
## F-statistic: 273.3 on 19 and 436 DF, p-value: < 2.2e-16
gklm.pred <- predict(gklm, valid.t)
accuracy(gklm.pred, valid.t$value_eur)
## ME RMSE MAE MPE MAPE
## Test set 80304.58 1873938 1235118 -70.33823 361.9268
goalkeeper[goalkeeper$overall == 85 & goalkeeper$age == 30, c("short_name","value_eur")]
## short_name value_eur
## 6464 P. Gulácsi 26000000
predict(gklm,data.frame(overall = 85 ,age = 30, wage_eur = 65000, gk_diving = 84, potential = 85, gk_handling = 85, gk_kicking =82, gk_reflexes =86, gk_speed
=43, gk_positioning =84, movement_reactions =82, power_shot_power = 62))
## 1
## 29996853
In the above, model, P.Gulaici’s market value is predicted
train.index = sample(seq(1,nrow(defenders)), floor(0.8*nrow(defenders)))
selected.var <- c(2,3,8:11,23,24,51,52,58,59)
train.t <- defenders[train.index, selected.var]
valid.t <- defenders[-train.index, selected.var]
head(valid.t)
## short_name age overall potential value_eur wage_eur defending
## 6920 Ricardo Pereira 26 85 87 40500000 120000 81
## 2161 D. Djené 28 82 83 21500000 39000 84
## 4459 Josué Chiamulera 28 82 82 19500000 41000 85
## 5092 L. Hernández 24 82 87 26500000 70000 83
## 369 Adryan Zonta 28 81 81 15000000 500 72
## 4486 Juiano Mestres 24 81 81 0 0 82
## physic mentality_aggression mentality_interceptions
## 6920 76 79 81
## 2161 80 88 85
## 4459 79 88 84
## 5092 79 86 83
## 369 77 69 74
## 4486 80 86 82
## defending_standing_tackle defending_sliding_tackle
## 6920 84 83
## 2161 86 83
## 4459 87 82
## 5092 84 86
## 369 72 69
## 4486 85 84
dflm = lm(value_eur ~ overall+ age + wage_eur * potential + defending * defending_sliding_tackle + defending_standing_tackle + mentality_interceptions
* mentality_aggression * physic,data = train.t)
summary(dflm)
##
## Call:
## lm(formula = value_eur ~ overall + age + wage_eur * potential +
## defending * defending_sliding_tackle + defending_standing_tackle +
## mentality_interceptions * mentality_aggression * physic,
## data = train.t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15425254 -906135 -182308 742276 19891571
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -1.140e+08 2.655e+07
## overall 6.875e+05 3.545e+04
## age -2.762e+05 2.971e+04
## wage_eur -1.444e+03 4.192e+01
## potential -1.951e+05 3.057e+04
## defending -5.358e+05 8.929e+04
## defending_sliding_tackle -5.080e+05 7.384e+04
## defending_standing_tackle 6.335e+04 3.161e+04
## mentality_interceptions 1.846e+06 4.196e+05
## mentality_aggression 2.256e+06 3.938e+05
## physic 1.810e+06 4.009e+05
## wage_eur:potential 1.828e+01 4.916e-01
## defending:defending_sliding_tackle 7.760e+03 1.111e+03
## mentality_interceptions:mentality_aggression -3.374e+04 5.957e+03
## mentality_interceptions:physic -2.737e+04 6.178e+03
## mentality_aggression:physic -3.324e+04 5.656e+03
## mentality_interceptions:mentality_aggression:physic 4.960e+02 8.475e+01
## t value Pr(>|t|)
## (Intercept) -4.296 1.88e-05 ***
## overall 19.393 < 2e-16 ***
## age -9.298 < 2e-16 ***
## wage_eur -34.460 < 2e-16 ***
## potential -6.384 2.43e-10 ***
## defending -6.001 2.56e-09 ***
## defending_sliding_tackle -6.879 9.50e-12 ***
## defending_standing_tackle 2.004 0.0453 *
## mentality_interceptions 4.399 1.18e-05 ***
## mentality_aggression 5.729 1.27e-08 ***
## physic 4.515 6.93e-06 ***
## wage_eur:potential 37.190 < 2e-16 ***
## defending:defending_sliding_tackle 6.984 4.65e-12 ***
## mentality_interceptions:mentality_aggression -5.664 1.84e-08 ***
## mentality_interceptions:physic -4.430 1.02e-05 ***
## mentality_aggression:physic -5.877 5.34e-09 ***
## mentality_interceptions:mentality_aggression:physic 5.852 6.18e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2053000 on 1251 degrees of freedom
## Multiple R-squared: 0.9163, Adjusted R-squared: 0.9152
## F-statistic: 855.8 on 16 and 1251 DF, p-value: < 2.2e-16
dflm.pred <- predict(dflm, valid.t)
accuracy(dflm.pred, valid.t$value_eur)
## ME RMSE MAE MPE MAPE
## Test set -103471.1 1870189 1184071 -Inf Inf
defenders[defenders$overall == 86 & defenders$age == 27, c("short_name","value_eur")]
## short_name value_eur
## 6841 R. Varane 46500000
predict(dflm,data.frame(overall = 86 ,age = 27, wage_eur = 230000, potential = 86, defending = 82, defending_standing_tackle =83, defending_sliding_tackle
= 85, mentality_aggression = 82, mentality_interceptions = 83, physic = 80))
## 1
## 46060782
In the above, model, R. Varane market value is predicted
train.index = sample(seq(1,nrow(midfielders)), floor(0.8*nrow(midfielders)))
selected.var <- c(2,3,8:11,21,22,34,36,40,50,53,54)
train.t <- midfielders[train.index, selected.var]
valid.t <- midfielders[-train.index, selected.var]
head(valid.t)
## short_name age overall potential value_eur wage_eur passing dribbling
## 4690 K. De Bruyne 29 91 91 87000000 370000 93 88
## 4448 J. Kimmich 25 88 90 65000000 145000 86 84
## 4242 J. Henderson 30 86 86 36500000 140000 84 79
## 1531 C. Eriksen 28 85 85 39000000 155000 88 81
## 3266 H. Ziyech 27 85 86 44000000 140000 87 84
## 7461 S. Gnabry 24 85 87 47500000 100000 78 86
## attacking_short_passing skill_dribbling skill_ball_control
## 4690 94 88 92
## 4448 87 83 85
## 4242 86 78 84
## 1531 90 77 90
## 3266 86 83 85
## 7461 79 87 83
## power_long_shots mentality_positioning mentality_vision
## 4690 91 88 94
## 4448 84 80 84
## 4242 75 78 83
## 1531 89 82 90
## 3266 77 82 89
## 7461 81 85 83
mflm = lm(value_eur ~ overall+ age + wage_eur + passing * attacking_short_passing + dribbling * skill_dribbling + skill_ball_control
+ mentality_positioning * mentality_vision * power_long_shots,data = train.t)
summary(mflm)
##
## Call:
## lm(formula = value_eur ~ overall + age + wage_eur + passing *
## attacking_short_passing + dribbling * skill_dribbling + skill_ball_control +
## mentality_positioning * mentality_vision * power_long_shots,
## data = train.t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10709161 -1054194 136739 1095472 21904568
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -4.952e+06 1.541e+07
## overall 5.280e+05 3.303e+04
## age -2.829e+05 2.384e+04
## wage_eur 1.347e+02 4.318e+00
## passing -1.204e+06 1.262e+05
## attacking_short_passing -1.076e+06 1.145e+05
## dribbling -8.530e+05 1.093e+05
## skill_dribbling -8.908e+05 9.868e+04
## skill_ball_control -6.981e+04 3.627e+04
## mentality_positioning 1.898e+06 2.226e+05
## mentality_vision 1.914e+06 2.143e+05
## power_long_shots 2.304e+06 2.105e+05
## passing:attacking_short_passing 1.661e+04 1.703e+03
## dribbling:skill_dribbling 1.280e+04 1.310e+03
## mentality_positioning:mentality_vision -3.040e+04 3.511e+03
## mentality_positioning:power_long_shots -3.682e+04 3.497e+03
## mentality_vision:power_long_shots -3.699e+04 3.409e+03
## mentality_positioning:mentality_vision:power_long_shots 5.862e+02 5.240e+01
## t value Pr(>|t|)
## (Intercept) -0.321 0.7480
## overall 15.985 < 2e-16 ***
## age -11.866 < 2e-16 ***
## wage_eur 31.206 < 2e-16 ***
## passing -9.545 < 2e-16 ***
## attacking_short_passing -9.394 < 2e-16 ***
## dribbling -7.805 1.23e-14 ***
## skill_dribbling -9.027 < 2e-16 ***
## skill_ball_control -1.925 0.0545 .
## mentality_positioning 8.526 < 2e-16 ***
## mentality_vision 8.934 < 2e-16 ***
## power_long_shots 10.947 < 2e-16 ***
## passing:attacking_short_passing 9.754 < 2e-16 ***
## dribbling:skill_dribbling 9.774 < 2e-16 ***
## mentality_positioning:mentality_vision -8.657 < 2e-16 ***
## mentality_positioning:power_long_shots -10.531 < 2e-16 ***
## mentality_vision:power_long_shots -10.852 < 2e-16 ***
## mentality_positioning:mentality_vision:power_long_shots 11.188 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2515000 on 1276 degrees of freedom
## Multiple R-squared: 0.9004, Adjusted R-squared: 0.8991
## F-statistic: 678.6 on 17 and 1276 DF, p-value: < 2.2e-16
mflm.pred <- predict(mflm, valid.t)
accuracy(mflm.pred, valid.t$value_eur)
## ME RMSE MAE MPE MAPE
## Test set 8733.147 2937471 1783696 NaN Inf
midfielders[midfielders$overall == 91 & midfielders$age == 29, c("short_name","value_eur")]
## short_name value_eur
## 4690 K. De Bruyne 87000000
predict(mflm,data.frame(overall = 91 ,age = 29, wage_eur = 370000, potential = 91, passing = 93, dribbling =88, attacking_short_passing
= 94, skill_dribbling = 88, skill_ball_control = 93, power_long_shots = 91, mentality_positioning = 88, mentality_vision = 94))
## 1
## 91200362
In the above, model, K.De Bruyne market value is predicted
train.index = sample(seq(1,nrow(forwards)), floor(0.8*nrow(forwards)))
selected.var <- c(2,3,8:11,19:20,32,35,37,41,42,46,31)
train.t <- forwards[train.index, selected.var]
valid.t <- forwards[-train.index, selected.var]
head(valid.t)
## short_name age overall potential value_eur wage_eur pace shooting
## 4604 K. Benzema 32 89 89 53000000 350000 74 85
## 6588 P. Dybala 26 88 89 71000000 190000 85 85
## 2210 D. Mertens 33 85 85 24000000 115000 86 82
## 5841 M. Depay 26 85 88 48500000 115000 86 83
## 8220 W. Ben Yedder 29 84 84 31000000 91000 84 83
## 5077 L. Ocampos 25 82 83 26500000 40000 82 83
## attacking_finishing attacking_volleys skill_curve movement_acceleration
## 4604 88 86 81 77
## 6588 84 88 88 89
## 2210 84 70 82 90
## 5841 83 74 85 85
## 8220 88 84 80 86
## 5077 85 81 80 80
## movement_sprint_speed power_shot_power attacking_crossing
## 4604 72 84 75
## 6588 82 82 82
## 2210 82 80 79
## 5841 87 87 83
## 8220 82 83 74
## 5077 83 84 79
flm = lm(value_eur ~ overall+ age + wage_eur + shooting * attacking_finishing * power_shot_power+ attacking_volleys + attacking_crossing * skill_curve
+ movement_acceleration + pace + movement_sprint_speed , data = train.t)
summary(flm)
##
## Call:
## lm(formula = value_eur ~ overall + age + wage_eur + shooting *
## attacking_finishing * power_shot_power + attacking_volleys +
## attacking_crossing * skill_curve + movement_acceleration +
## pace + movement_sprint_speed, data = train.t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19476071 -1470218 91121 1364766 48663504
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -3.477e+08 5.965e+07 -5.828
## overall 5.394e+05 7.368e+04 7.321
## age -3.549e+05 4.991e+04 -7.111
## wage_eur 1.538e+02 7.265e+00 21.166
## shooting 4.503e+06 1.165e+06 3.867
## attacking_finishing 5.213e+06 1.058e+06 4.926
## power_shot_power 6.781e+06 8.599e+05 7.886
## attacking_volleys 5.511e+03 3.027e+04 0.182
## attacking_crossing -3.642e+05 5.139e+04 -7.088
## skill_curve -3.156e+05 5.053e+04 -6.247
## movement_acceleration -2.006e+05 2.272e+05 -0.883
## pace 5.203e+05 5.032e+05 1.034
## movement_sprint_speed -2.855e+05 2.800e+05 -1.020
## shooting:attacking_finishing -6.885e+04 1.409e+04 -4.887
## shooting:power_shot_power -9.424e+04 1.459e+04 -6.459
## attacking_finishing:power_shot_power -1.016e+05 1.554e+04 -6.538
## attacking_crossing:skill_curve 6.257e+03 8.721e+02 7.174
## shooting:attacking_finishing:power_shot_power 1.391e+03 1.779e+02 7.816
## Pr(>|t|)
## (Intercept) 9.18e-09 ***
## overall 7.99e-13 ***
## age 3.31e-12 ***
## wage_eur < 2e-16 ***
## shooting 0.000122 ***
## attacking_finishing 1.09e-06 ***
## power_shot_power 1.48e-14 ***
## attacking_volleys 0.855611
## attacking_crossing 3.86e-12 ***
## skill_curve 7.93e-10 ***
## movement_acceleration 0.377619
## pace 0.301565
## movement_sprint_speed 0.308239
## shooting:attacking_finishing 1.32e-06 ***
## shooting:power_shot_power 2.18e-10 ***
## attacking_finishing:power_shot_power 1.34e-10 ***
## attacking_crossing:skill_curve 2.16e-12 ***
## shooting:attacking_finishing:power_shot_power 2.47e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3599000 on 598 degrees of freedom
## Multiple R-squared: 0.893, Adjusted R-squared: 0.89
## F-statistic: 293.6 on 17 and 598 DF, p-value: < 2.2e-16
flm.pred <- predict(flm, valid.t)
accuracy(flm.pred, valid.t$value_eur)
## ME RMSE MAE MPE MAPE
## Test set -659114.9 4314508 2646498 -Inf Inf
forwards[forwards$overall == 87 & forwards$age == 30, c("short_name","value_eur")]
## short_name value_eur
## 1601 C. Immobile 48500000
predict(flm,data.frame(overall = 87 ,age = 30, wage_eur = 125000, potential = 87, pace = 84, shooting = 88, attacking_finishing
= 93, attacking_volleys = 85, skill_curve = 70, movement_acceleration = 82, movement_sprint_speed = 85, power_shot_power = 86,
attacking_crossing=55))
## 1
## 47027303
In the above, model, Immobile market value is predicted
To find this, we have used the classic 4-3-3 formation in football
source("fun2.R")
playingEleven <- best_eleven(budget = 100000000, samplefifa21)
playingEleven
## short_name age team_position value_eur
## 4177 J. Omlin 26 GK 8500000
## 3762 J. Justin 22 LB 5500000
## 206 A. Long 27 FB 7000000
## 8081 V. Nelsson 21 FB 8000000
## 107 T. Tomiyasu 21 RB 4900000
## 2386 E. Atuesta 23 DMF 5500000
## 6962 R. McGree 21 CMF 3300000
## 7121 R. McCrorie 22 CMF 1200000
## 6671 P. Zinckernagel 25 RW 5000000
## 2062 D. Kutesa 22 LW 1600000
## 1550 C. Bassogog 24 ST 5000000
country <- table(samplefifa21$nationality)
country <- data.frame(country)
head(country)
## Var1 Freq
## 1 Albania 7
## 2 Algeria 15
## 3 Angola 4
## 4 Antigua & Barbuda 1
## 5 Argentina 276
## 6 Armenia 1
WorldData <- map_data('world') %>% fortify
player_map <- ggplot() +
geom_map(data = WorldData, map = WorldData,
aes(x = long, y = lat, group = group, map_id=region),
fill = "white", colour = "#7f7f7f", size=0.5) +
geom_map(data = country, map=WorldData,
aes(fill=Freq, map_id=Var1),
colour="#7f7f7f", size=0.5) +
coord_map("rectangular", lat0=0, xlim=c(-180,180), ylim=c(-60, 90)) +
scale_fill_continuous(low="#E67E22", high="#1ABC9C", guide="colorbar") +
labs(fill="Frequency", title="Distribution of Players across the globe") +
theme_bw()
player_map